In [1]:
import re
import numpy as np
from IPython.display import display as D
%rehashx

In [2]:
%cd ~/courses/phys572_lasers/lecture/


/home/justin/courses/phys572_lasers/lecture

In [3]:
pdfnum_re = re.compile(r"lecture([0-9]{1,2})\.pdf", re.IGNORECASE)

In [4]:
files = !ls lecture*
D(files)


['lecture01.PDF',
 'lecture02.PDF',
 'lecture03.PDF',
 'lecture04.PDF',
 'lecture05.PDF',
 'lecture06.PDF',
 'lecture07.pdf',
 'lecture08.pdf',
 'lecture09.pdf',
 'lecture10.pdf',
 'lecture11.pdf',
 'lecture12.pdf',
 'lecture13.pdf',
 'lecture14.pdf',
 'lecture15.pdf',
 'lecture16.pdf',
 'lecture17.pdf',
 'lecture18.pdf',
 'lecture19.pdf',
 'lecture20.pdf',
 'lecture21.pdf',
 'lecture22.pdf',
 'lecture23.pdf',
 'lecture24.pdf',
 'lecture25.pdf',
 'lecture26.pdf']

In [5]:
newfiles = []
idxnames = []
for f in files:
    filenum = int(pdfnum_re.findall(f)[0])
    pdfname = r"Lecture\ "+format(filenum, "02d")+".pdf" 
    idxname = r"Lecture "+format(filenum, "d")
    newfiles.append(pdfname)
    idxnames.append(idxname)
    !cp $f $pdfname
    print filenum, pdfname, idxname


1 Lecture\ 01.pdf Lecture 1
2 Lecture\ 02.pdf Lecture 2
3 Lecture\ 03.pdf Lecture 3
4 Lecture\ 04.pdf Lecture 4
5 Lecture\ 05.pdf Lecture 5
6 Lecture\ 06.pdf Lecture 6
7 Lecture\ 07.pdf Lecture 7
8 Lecture\ 08.pdf Lecture 8
9 Lecture\ 09.pdf Lecture 9
10 Lecture\ 10.pdf Lecture 10
11 Lecture\ 11.pdf Lecture 11
12 Lecture\ 12.pdf Lecture 12
13 Lecture\ 13.pdf Lecture 13
14 Lecture\ 14.pdf Lecture 14
15 Lecture\ 15.pdf Lecture 15
16 Lecture\ 16.pdf Lecture 16
17 Lecture\ 17.pdf Lecture 17
18 Lecture\ 18.pdf Lecture 18
19 Lecture\ 19.pdf Lecture 19
20 Lecture\ 20.pdf Lecture 20
21 Lecture\ 21.pdf Lecture 21
22 Lecture\ 22.pdf Lecture 22
23 Lecture\ 23.pdf Lecture 23
24 Lecture\ 24.pdf Lecture 24
25 Lecture\ 25.pdf Lecture 25
26 Lecture\ 26.pdf Lecture 26

Merge the PDF's into one; note that the --rotateoversize false keeps large pages from being rotated, which happened in these notes.

TODO: Modify to figure out the (or each) page size(s) automatically and use that, or something reasonable given the measurements.


In [6]:
tempfile = "lectures.tmp.pdf"
outfile = "PHYS_572_lectures.pdf"
!pdfjoin --paper a4paper --rotateoversize false Lecture*.pdf --outfile $tempfile


          ----
  pdfjam: This is pdfjam version 2.08.
  pdfjam: Reading any site-wide or user-specific defaults...
          (none found)
  pdfjam: Effective call for this run of pdfjam:
          /usr/bin/pdfjam --fitpaper 'true' --rotateoversize 'true' --suffix joined --paper a4paper --rotateoversize 'false' --outfile lectures.tmp.pdf -- Lecture\ 01.pdf - Lecture\ 02.pdf - Lecture\ 03.pdf - Lecture\ 04.pdf - Lecture\ 05.pdf - Lecture\ 06.pdf - Lecture\ 07.pdf - Lecture\ 08.pdf - Lecture\ 09.pdf - Lecture\ 10.pdf - Lecture\ 11.pdf - Lecture\ 12.pdf - Lecture\ 13.pdf - Lecture\ 14.pdf - Lecture\ 15.pdf - Lecture\ 16.pdf - Lecture\ 17.pdf - Lecture\ 18.pdf - Lecture\ 19.pdf - Lecture\ 20.pdf - Lecture\ 21.pdf - Lecture\ 22.pdf - Lecture\ 23.pdf - Lecture\ 24.pdf - Lecture\ 25.pdf - Lecture\ 26.pdf - 
  pdfjam: Calling pdflatex...
  pdfjam: Finished.  Output was to 'lectures.tmp.pdf'.

Sort the files and correspondig index entries


In [7]:
sortind = np.argsort(newfiles)
newfiles = [ newfiles[i] for i in sortind ]
idxnames = [ idxnames[i] for i in sortind ]
D(idxnames)


['Lecture 1',
 'Lecture 2',
 'Lecture 3',
 'Lecture 4',
 'Lecture 5',
 'Lecture 6',
 'Lecture 7',
 'Lecture 8',
 'Lecture 9',
 'Lecture 10',
 'Lecture 11',
 'Lecture 12',
 'Lecture 13',
 'Lecture 14',
 'Lecture 15',
 'Lecture 16',
 'Lecture 17',
 'Lecture 18',
 'Lecture 19',
 'Lecture 20',
 'Lecture 21',
 'Lecture 22',
 'Lecture 23',
 'Lecture 24',
 'Lecture 25',
 'Lecture 26']

In [8]:
r = re.compile(r"Pages:\s*([0-9]+)")
idxentries = []
pagenum = 1
for (f, name) in zip(files, idxnames):
    idxentry = r"[/Page " + format(pagenum,"d") \
        + r" /View [/XYZ null null null] /Title (" \
        + name \
        + r") /OUT pdfmark"
    idxentries.append(idxentry)
    pages_s = !pdfinfo $f | grep Pages
    pages = int(r.findall(pages_s[0])[0])
    pagenum += pages
idx = "\n".join(idxentries)

In [9]:
with file("index.info", "w") as idxinfo:
    idxinfo.write(idx)

Add the index to the pdf; instructions on creating index in PDF file obtained here:

http://linproject.blogspot.com/2012/06/adding-index-to-your-pdf-file.html


In [10]:
!gs -sDEVICE=pdfwrite -q -dBATCH -dNOPAUSE \
    -sOutputFile=$outfile index.info -f $tempfile

In [11]:
!rm -f $tempfile